Use admission_data.csv
for this exercise.
# Load and view first few lines of dataset
import pandas as pd
import numpy as np
df = pd.read_csv("admission_data.csv")
df.head()
df.groupby([ 'gender']).admitted.value_counts()
74/(74+183)
# Proportion of students that are female
rows = df.shape[0]
(df.gender.value_counts()/rows)[1]
# Proportion of students that are male
(df.gender.value_counts()/rows)[0]
# Admission rate for females
len(df[(df.admitted==True)&(df.gender=='female')])/len(df[df.gender=='female'])
# Admission rate for males
len(df[(df.admitted==True)&(df.gender=='male')])/len(df[df.gender=='male'])
total_females = df.gender.value_counts().female
total_males = df.gender.value_counts().male
print(total_females)
print(total_males)
print(df.shape)
# What proportion of female students are majoring in physics?
physics = df[df.major=='Physics']
print(physics.shape)
print(physics.gender.value_counts())
print()
print(physics.gender.value_counts().female / total_females)
# What proportion of male students are majoring in physics?
physics.gender.value_counts().male / total_males
total_admitted_for_physics = physics[(physics.admitted==True)].admitted.sum()
females_admitted_for_physics = len(physics[(physics.admitted==True) & (physics.gender=='female')])
males_admitted_for_physics = len(physics[(physics.admitted==True) & (physics.gender=='male')])
# Admission rate for female physics majors
females_admitted_for_physics / total_admitted_for_physics
# Admission rate for male physics majors
males_admitted_for_physics / total_admitted_for_physics
physics.groupby('gender').admitted.value_counts()
23/31
chem = df[df.major=='Chemistry']
print(chem.shape)
print(chem.gender.value_counts())
print()
# What proportion of female students are majoring in chemistry?
chem.gender.value_counts().female / df.gender.value_counts().female
# What proportion of male students are majoring in chemistry?
chem.gender.value_counts().male / df.gender.value_counts().male
chem.groupby('gender').admitted.value_counts()
# Admission rate for female chemistry majors
51 /(51+175)
# Admission rate for male chemistry majors
2 /18
df.groupby(['major', 'admitted']).gender.value_counts()
# Admission rate for physics majors
len(df[(df.admitted==True) & (df.major=='Physics')]) / len(df[df.admitted==True])
# Admission rate for chemistry majors
len(df[(df.admitted==True) & (df.major=='Chemistry')]) / len(df[df.admitted==True])